clear

use "$saveoutputs/D_TFPQIV_CS_klme_nic4", clear 
sum sigma_hat 
local sigma_hat = - r(mean) // It should be a negative number, so turn positive!


	clear
	use "$saveoutputs/CS_klme_by_nic2_year"

	generate delta_hat = (`sigma_hat' - 1)/(`sigma_hat')
		foreach variable in beta_k_hat beta_l_hat beta_m_hat beta_e_hat ///
		lag_beta_k_hat lag_beta_l_hat lag_beta_m_hat lag_beta_e_hat {
			generate `variable'_r = `variable'_q * delta_hat
		}

	keep  lag_* beta_k_hat_* beta_l_hat_* beta_m_hat_* beta_e_hat_* delta_hat year nic_08_2dig

	generate RTS =  beta_k_hat_q + beta_l_hat_q + beta_m_hat_q + beta_e_hat_q
	generate lag_RTS =  lag_beta_k_hat_q + lag_beta_l_hat_q + lag_beta_m_hat_q + lag_beta_e_hat_q
	sum RTS lag_RTS
	tempfile params

	save `params', replace

	clear

	use "$working/prowess_wits_estimation_qonly_klme.dta", clear

	merge m:1 year nic_08_2dig using `params'
	drop _merge
	
	tabulate nic_08_2dig, sort

	***Drop the single product firms that are non-comparable- i.e., missing energy and quantity data
	keep if q_s!=. &  energy_intensity!=.

	merge m:1 co_code1 year using "$working/first_last_years.dta"
	keep if _merge==3
	drop _merge

	sort product_id year
	xtset product_id year
	bys co_code1 year: egen nprod = count(co_code1)
	tsfill, full
	sort product_id year
	gen N = missing(L.co_code1)
	gen D = missing(f.co_code1)

	replace N = . if first_year == 1
	replace D = . if last_year == 1

	drop if missing(co_code1)

	drop cleaned_number_prod
	bysort co_code1 year: generate cleaned_number_prods = _N

	tabulate cleaned_number_prods
	
	***dropping missing share product-firms As well as firms which are missing revenues by product
	bysort co_code year: generate prod_count = _N
	drop if shares==0
	drop if shares==.
	drop if r==.
	bysort co_code year: generate prod_count2 = _N
	drop if prod_count != prod_count2

	gen omega_j= q_s - beta_l_hat_q * (l + ln_shares) - beta_k_hat_q* ( k + ln_shares) - beta_m_hat_q * (m + ln_shares) - beta_e_hat_q*(e + ln_shares)
	gen omega_j_LPF= q_s - lag_beta_l_hat_q * (l + ln_shares) - lag_beta_k_hat_q* ( k + ln_shares) - lag_beta_m_hat_q * (m + ln_shares) - lag_beta_e_hat_q*(e + ln_shares)
		
	gen d_j= r - beta_l_hat_r* (l + ln_shares) - beta_k_hat_r*( k + ln_shares) - beta_m_hat_r * (m + ln_shares) - beta_e_hat_r*(e + ln_shares) ///
	- delta_hat*omega_j
	gen d_j_LPF= r - lag_beta_l_hat_r* (l + ln_shares) - lag_beta_k_hat_r*( k + ln_shares) - lag_beta_m_hat_r * (m + ln_shares) - lag_beta_e_hat_r*(e + ln_shares) ///
	- delta_hat*omega_j_LPF

	gen h_j = r - beta_l_hat_r*(l + ln_shares) - beta_k_hat_r*( k + ln_shares) - beta_m_hat_r * (m + ln_shares) - beta_e_hat_r*(e + ln_shares) 
	gen h_j_LPF = r - lag_beta_l_hat_r*(l + ln_shares) - lag_beta_k_hat_r*( k + ln_shares) - lag_beta_m_hat_r * (m + ln_shares) - lag_beta_e_hat_r*(e + ln_shares) 

	save "$saveoutputs/pre_reg_data_CS_klme_nic2_by_year", replace



	clear
	use "$saveoutputs/pre_reg_data_CS_klme_nic2_by_year"
	egen prod_id = group(co_code1 product_name5)
	xtset prod_id year
	replace N = 0 if N==.
	replace D = 0 if D==.

	generate no_D = 1 - D 
	generate no_N = 1- N

	drop if shares==0 //Should this be done earlier?
	drop if shares==.

	bysort co_code1 year: egen T_no_D = total(no_D)
	bysort co_code1 year: egen T_no_N = total(no_N)

	generate delta_share_num = share^(delta_hat*RTS)
	bysort co_code1 year: egen delta_share_denom = total(delta_share_num)
	generate delta_share = delta_share_num/delta_share_denom

	generate no_D_delta_share = no_D*delta_share
	generate no_N_delta_share = no_N*delta_share
	generate D_delta_share = D*delta_share
	generate N_delta_share = N*delta_share

	bysort co_code1 year: egen T_no_D_delta_share = total(no_D_delta_share)
	bysort co_code1 year: egen T_no_N_delta_share = total(no_N_delta_share)
	bysort co_code1 year: egen T_D_delta_share = total(D_delta_share)
	bysort co_code1 year: egen T_N_delta_share = total(N_delta_share)

	generate no_D_shares = (no_D*delta_share)/T_no_D_delta_share
	generate no_N_shares = (no_N*delta_share)/T_no_N_delta_share
	generate D_shares = (D*delta_share)/T_D_delta_share
	generate N_shares = (N*delta_share)/T_N_delta_share

	replace no_D_shares = 0 if T_no_D_delta_share==0
	replace no_N_shares = 0 if T_no_N_delta_share==0
	replace D_shares= 0 if T_D_delta_share==0
	replace N_shares = 0 if T_N_delta_share==0

	generate H = delta_share_num*exp(h_j)
	generate a_j= delta_hat*omega_j
	generate full_H_cov = exp(h_j)*delta_share
	
	generate H_LPF = delta_share_num*exp(h_j_LPF)
	generate a_j_LPF = delta_hat*omega_j_LPF
	generate full_H_cov_LPF = exp(h_j_LPF)*delta_share

	***Calculate unweighted productivity measures for the two groups:
	foreach variable in h_j a_j d_j h_j_LPF a_j_LPF d_j_LPF {
		generate `variable'_no_D = `variable'*no_D
		generate `variable'_no_N = `variable'*no_N
		bysort co_code1 year: egen T_`variable'_no_D = total(`variable'_no_D)
		bysort co_code1 year: egen T_`variable'_no_N = total(`variable'_no_N)
		generate UWM_`variable'_no_D = T_`variable'_no_D/T_no_D
		generate UWM_`variable'_no_N = T_`variable'_no_N/T_no_N
		replace UWM_`variable'_no_D = 0 if T_no_D==0
		replace UWM_`variable'_no_N =0 if T_no_N==0
				
	}

	generate h_j_hat_no_D = h_j - UWM_h_j_no_D
	generate h_j_hat_no_N = h_j - UWM_h_j_no_N

	generate no_D_H_hat_cov = exp(h_j_hat_no_D )*no_D_shares
	generate no_N_H_hat_cov = exp(h_j_hat_no_N)*no_N_shares
	generate no_D_H_cov = exp(h_j)*no_D_shares
	generate no_N_H_cov = exp(h_j)*no_N_shares
	generate D_H_cov = exp(h_j)*D_shares
	generate N_H_cov = exp(h_j)*N_shares
	
	
	****
	generate h_j_LPF_hat_no_D = h_j_LPF - UWM_h_j_LPF_no_D
	generate h_j_LPF_hat_no_N = h_j_LPF - UWM_h_j_LPF_no_N

	generate no_D_H_hat_cov_LPF = exp(h_j_LPF_hat_no_D )*no_D_shares
	generate no_N_H_hat_cov_LPF = exp(h_j_LPF_hat_no_N)*no_N_shares
	generate no_D_H_cov_LPF = exp(h_j_LPF)*no_D_shares
	generate no_N_H_cov_LPF = exp(h_j_LPF)*no_N_shares
	generate D_H_cov_LPF = exp(h_j_LPF)*D_shares
	generate N_H_cov_LPF = exp(h_j_LPF)*N_shares
	***


	* Create a dummy for firms that are multi-products at least in one year
	bys co_code1: egen fobs = count(year)
	egen tag = tag(co_code1 year)
	egen fobsy = total(tag), by(co_code1)
	gen multi_prod = fobs > fobsy
	bys co_code1: egen sw = nvals(product_name5) if multi_prod == 0 // single product firms that are switching to another product
	replace multi_prod = 1 if sw > 1 & sw != .
	**

	collapse (mean) first_year last_year  ///
	UWM_d_j_no_D UWM_d_j_no_N UWM_a_j_no_N UWM_a_j_no_D UWM_h_j_no_D UWM_h_j_no_N ///
	UWM_d_j_LPF_no_D UWM_d_j_LPF_no_N UWM_a_j_LPF_no_N UWM_a_j_LPF_no_D UWM_h_j_LPF_no_D UWM_h_j_LPF_no_N ///
	multi_prod ///
	(sum) no_D no_N N D ///
	H full_H_cov delta_share_num no_D_H_cov no_N_H_cov D_H_cov N_H_cov no_D_H_hat_cov no_N_H_hat_cov ///
	H_LPF full_H_cov_LPF no_D_H_cov_LPF no_N_H_cov_LPF D_H_cov_LPF N_H_cov_LPF no_D_H_hat_cov_LPF no_N_H_hat_cov_LPF ///
	no_D_shares no_N_shares D_shares N_shares D_delta_share N_delta_share, by(co_code1 year)

	generate total_prods_1 = no_D + D
	generate total_prods_2 = no_N + N
	generate test = total_prods_1 - total_prods_2 

	sum test if first_year !=1 & last_year !=1
	sum total_prods_1 if last_year !=1
	sum total_prods_2 if first_year !=1

	foreach variable in H no_D_H_cov no_N_H_cov D_H_cov N_H_cov delta_share_num full_H_cov no_D_H_hat_cov no_N_H_hat_cov ///
	H_LPF no_D_H_cov_LPF no_N_H_cov_LPF D_H_cov_LPF N_H_cov_LPF full_H_cov_LPF no_D_H_hat_cov_LPF no_N_H_hat_cov_LPF {
		generate ln_`variable' = ln(`variable')
	}

	generate ln_H_alt = ln_full_H_cov + ln_delta_share_num
	generate tester = ln_H - ln_H_alt
	sum tester if first_year ==1 | last_year ==1, detail
	sum tester, detail

	xtset co_code1 year
	
	***
	
	generate full_dif_TFPR = ln_H_LPF - L.ln_H
	generate dif_d_bar = UWM_d_j_LPF_no_N - L.UWM_d_j_no_D
	generate dif_a_bar = UWM_a_j_LPF_no_N - L.UWM_a_j_no_D
	generate dif_h_bar = UWM_h_j_LPF_no_N - L.UWM_h_j_no_D

	generate dif_cov_stay = ln_no_N_H_hat_cov_LPF - L.ln_no_D_H_hat_cov
	generate dif_var_adj = D.ln_delta_share_num

	generate adding_term = ln(1 + N_delta_share*((N_H_cov_LPF - no_N_H_cov_LPF)/(no_N_H_cov_LPF))) 
	generate drop_term = -ln(1 + L.D_delta_share*((L.D_H_cov - L.no_D_H_cov)/(L.no_D_H_cov)))

	generate full_dif_TFPR_alt = dif_d_bar + dif_a_bar + dif_cov_stay + dif_var_adj + adding_term + drop_term

	
	save "$saveoutputs/add_drop_categories_CS_klme_nic2_by_year", replace

	clear

	use "$saveoutputs/pre_reg_data_CS_klme_nic2_by_year"

	collapse (sum) sales_value (mean) beta_* delta_* l k m e ln_IND ln_IVS cleaned_number_prods I_sq I_sq_* IND_CHN IND_WLD LMI_CHN LMI_WLD LMI_WLD_EXCL_IND LMI_CHN_EXCL_IND ///
	wIPT_IND_CHN wIPT_IND_WLD wIPT_LMI_CHN wIPT_LMI_WLD wIPT_LMI_WLD_EXCL_IND wIPT_LMI_CHN_EXCL_IND rwIPT_IND_CHN rwIPT_IND_WLD rwIPT_LMI_CHN rwIPT_LMI_WLD rwIPT_LMI_WLD_EXCL_IND rwIPT_LMI_CHN_EXCL_IND, by(co_code1 year nic_08_2dig nic_08_4dig)

		gen r = ln(sales_value)
		gen tfpr = r - beta_k_hat_r * k - beta_l_hat_r * l - beta_m_hat_r * m - beta_e_hat_r * e 

		merge 1:1 co_code1 year using "$saveoutputs/add_drop_categories_CS_klme_nic2_by_year"
		keep if _merge == 3
		drop _merge

		save "$saveoutputs/decomposition_SS_CS_klme_nic2_by_year", replace
